package com.qianyan.webcatch;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;

/**
 * Created by lujianing on 2014-7-23.
 */
public class OscBlogPaser {
    public static void main(String[] args) throws IOException {
        File file = new File("F://blogs_20140723.html");
        Document document = Jsoup.parse(file, "UTF-8", "");


        Elements elements = document.select("div.blogList div.blog");

        for(Element element : elements){

            //得到博客的标题
            System.out.println(element.select("a.top").get(0).siblingElements().get(0).ownText());

            //得到博客的创建时间
            System.out.println(element.select(".date").get(0).ownText());

            System.out.println(element.select(".catalog").get(0).ownText());

            try{
                System.out.println(element.select(".tags").get(0).ownText());
            }catch (Exception e){

            }


            System.out.println(element.select(".content").get(0).children());

            System.out.println("-----------------------------------------");

        }
    }
}
